##################################################################################
###Replication File for Lacombe (2018), "Political Weaponization of Gun Owners"###
##################################################################################

###TOPIC MODEL###
library(stm)
#loading and prepping data
load(file="RiflemanDataProcessed.Rdata")
out <- prepDocuments(processed$documents, processed$vocab, processed$meta)
docs <- out$documents
vocab <- out$vocab
meta <- out$meta
seed <- 567
#estimate topic model
stm.rep <- stm(docs, vocab, K=6, prevalence =~ s(Year), content =~ Author, max.em.its=250, seed=seed, data=meta)
#identify top words and send them to a txt file
sagelabs.stm <- sageLabels(stm.rep,n=10)
sink("sagelabs.stm.txt")
sagelabs.stm
sink()
#model to create topic model figures in online appendix
prep.stm.rep <- estimateEffect(1:6 ~ s(Year),stm.rep,meta=meta)
#creating appendix figure 1
color <- c("darkgray","red","black","darkblue","gold","forestgreen")
plot(prep.stm.rep, "Year", method = "continuous", main=substitute(paste(italic('American Rifleman'), "Editorial Topics")), topics = c(1:6), printlegend = FALSE, labeltype = "custom", linecol= color, ylab = "Topic Frequency", xlab = "Year", ylim=c(0,.55), ci.level=F)
legend(1929,.56,cex=1,y.intersp = .4, text.width = 24,bty="y",c('Shooting Sports and Military Preparedness','Membership Programs and Benefits','Crime, Self-Defense, and Guns','Gun Regulation','Second Amendment','Americanism and Guns'), lwd=4, col=c("darkgray","red","black","darkblue","gold","forestgreen"))
axis(1,at=c(1930,1940,1950,1960,1970,1980,1990,2000,2010))
axis(2,at=c(0.0,0.1,0.2,0.3,0.4,0.5))
#creating appendix figure 2
color <- c("black","darkblue","gold","forestgreen")
plot(prep.stm.rep, "Year", method = "continuous", main=substitute("Political Topics"), topics = c(3:6), printlegend = FALSE, labeltype = "custom", linecol= color, ylab = "Topic Frequency", xlab = "Year", ylim=c(0,.4), ci.level=F)
legend(1929,.4,cex=1,y.intersp = .3, text.width = 17.5,bty="y",c('Crime, Self-Defense, and Guns','Gun Regulation','Second Amendment','Americanism and Guns'), lwd=4, col=c("black","darkblue","gold","forestgreen"))
axis(1,at=c(1930,1940,1950,1960,1970,1980,1990,2000,2010))
axis(2,at=c(0.0,0.1,0.2,0.3,0.4))
#creating appendix figure 3
plot(prep.stm.rep, "Year", method = "continuous", main=substitute("Gun Regulation"), topics = c(3), printlegend = FALSE, labeltype = "custom", linecol="black",  ylab = "Topic Frequency", xlab = "Year", ylim=c(0,.4), ci.level=T)
axis(1,at=c(1930,1940,1950,1960,1970,1980,1990,2000,2010))
axis(2,at=c(0.0,0.1,0.2,0.3,0.4))
#creating appendix figure 4
plot(prep.stm.rep, "Year", method = "continuous", main=substitute("Crime, Self-Defense, and Guns"), topics = c(4), printlegend = FALSE, labeltype = "custom", linecol="black",  ylab = "Topic Frequency", xlab = "Year", ylim=c(0,.4), ci.level=T)
axis(1,at=c(1930,1940,1950,1960,1970,1980,1990,2000,2010))
axis(2,at=c(0.0,0.1,0.2,0.3,0.4))
#creating appendix figure 5
plot(prep.stm.rep, "Year", method = "continuous", main=substitute("Second Amendment"), topics = c(5), printlegend = FALSE, labeltype = "custom", linecol="black",  ylab = "Topic Frequency", xlab = "Year", ylim=c(0,.4), ci.level=T)
axis(1,at=c(1930,1940,1950,1960,1970,1980,1990,2000,2010))
axis(2,at=c(0.0,0.1,0.2,0.3,0.4))
#creating appendix figure 6
plot(prep.stm.rep, "Year", method = "continuous", main=substitute("Americanism and Guns"), topics = c(6), printlegend = FALSE, labeltype = "custom", linecol="black",  ylab = "Topic Frequency", xlab = "Year", ylim=c(0,.4), ci.level=T)
axis(1,at=c(1930,1940,1950,1960,1970,1980,1990,2000,2010))
axis(2,at=c(0.0,0.1,0.2,0.3,0.4))


###LAGGED MODEL DEPICTED IN TABLE 3###
#Model presented in table 2
ident.measure <- read.csv("LaggedLPM.csv") 
ident.measure$RIFLEanti.gunners.lag1 <- c(NA,ident.measure$RIFLEanti.gunners[1:78])
ident.measure$RIFLEanti.gunners.lag2 <- c(NA,NA,ident.measure$RIFLEanti.gunners[1:77])
ident.measure$RIFLEanti.gunners.lag3 <- c(NA,NA,NA,ident.measure$RIFLEanti.gunners[1:76])
ident.measure$LETTERanti.gunners.lag1 <- c(NA,ident.measure$LETTERanti.gunners[1:78])
ident.measure$LETTERanti.gunners.lag2 <- c(NA,NA,ident.measure$LETTERanti.gunners[1:77])
ident.measure$LETTERanti.gunners.lag3 <- c(NA,NA,NA,ident.measure$LETTERanti.gunners[1:76])
summary(lm(LETTERanti.gunners ~ RIFLEanti.gunners+RIFLEanti.gunners.lag1+RIFLEanti.gunners.lag2+RIFLEanti.gunners.lag3+LETTERanti.gunners.lag1+LETTERanti.gunners.lag2+LETTERanti.gunners.lag3,data=ident.measure))

ident.measure$RIFLEaverage.citizens.lag1 <- c(NA,ident.measure$RIFLEaverage.citizens[1:78])
ident.measure$RIFLEaverage.citizens.lag2 <- c(NA,NA,ident.measure$RIFLEaverage.citizens[1:77])
ident.measure$RIFLEaverage.citizens.lag3 <- c(NA,NA,NA,ident.measure$RIFLEaverage.citizens[1:76])
ident.measure$LETTERaverage.citizens.lag1 <- c(NA,ident.measure$LETTERaverage.citizens[1:78])
ident.measure$LETTERaverage.citizens.lag2 <- c(NA,NA,ident.measure$LETTERaverage.citizens[1:77])
ident.measure$LETTERaverage.citizens.lag3 <- c(NA,NA,NA,ident.measure$LETTERaverage.citizens[1:76])
summary(lm(LETTERaverage.citizens ~ RIFLEaverage.citizens+RIFLEaverage.citizens.lag1+RIFLEaverage.citizens.lag2+RIFLEaverage.citizens.lag3+LETTERaverage.citizens.lag1+LETTERaverage.citizens.lag2+LETTERaverage.citizens.lag3,data=ident.measure))

ident.measure$RIFLEfreedom.loving.lag1 <- c(NA,ident.measure$RIFLEfreedom.loving[1:78])
ident.measure$RIFLEfreedom.loving.lag2 <- c(NA,NA,ident.measure$RIFLEfreedom.loving[1:77])
ident.measure$RIFLEfreedom.loving.lag3 <- c(NA,NA,NA,ident.measure$RIFLEfreedom.loving[1:76])
ident.measure$LETTERfreedom.loving.lag1 <- c(NA,ident.measure$LETTERfreedom.loving[1:78])
ident.measure$LETTERfreedom.loving.lag2 <- c(NA,NA,ident.measure$LETTERfreedom.loving[1:77])
ident.measure$LETTERfreedom.loving.lag3 <- c(NA,NA,NA,ident.measure$LETTERfreedom.loving[1:76])
summary(lm(LETTERfreedom.loving ~ RIFLEfreedom.loving+RIFLEfreedom.loving.lag1+RIFLEfreedom.loving.lag2+RIFLEfreedom.loving.lag3+LETTERfreedom.loving.lag1+LETTERfreedom.loving.lag2+LETTERfreedom.loving.lag3,data=ident.measure))

ident.measure$RIFLEordinary.citizens.lag1 <- c(NA,ident.measure$RIFLEordinary.citizens[1:78])
ident.measure$RIFLEordinary.citizens.lag2 <- c(NA,NA,ident.measure$RIFLEordinary.citizens[1:77])
ident.measure$RIFLEordinary.citizens.lag3 <- c(NA,NA,NA,ident.measure$RIFLEordinary.citizens[1:76])
ident.measure$LETTERordinary.citizens.lag1 <- c(NA,ident.measure$LETTERordinary.citizens[1:78])
ident.measure$LETTERordinary.citizens.lag2 <- c(NA,NA,ident.measure$LETTERordinary.citizens[1:77])
ident.measure$LETTERordinary.citizens.lag3 <- c(NA,NA,NA,ident.measure$LETTERordinary.citizens[1:76])
summary(lm(LETTERordinary.citizens ~ RIFLEordinary.citizens+RIFLEordinary.citizens.lag1+RIFLEordinary.citizens.lag2+RIFLEordinary.citizens.lag3+LETTERordinary.citizens.lag1+LETTERordinary.citizens.lag2+LETTERordinary.citizens.lag3,data=ident.measure))

ident.measure$RIFLElaw.abiding.lag1 <- c(NA,ident.measure$RIFLElaw.abiding[1:78])
ident.measure$RIFLElaw.abiding.lag2 <- c(NA,NA,ident.measure$RIFLElaw.abiding[1:77])
ident.measure$RIFLElaw.abiding.lag3 <- c(NA,NA,NA,ident.measure$RIFLElaw.abiding[1:76])
ident.measure$LETTERlaw.abiding.lag1 <- c(NA,ident.measure$LETTERlaw.abiding[1:78])
ident.measure$LETTERlaw.abiding.lag2 <- c(NA,NA,ident.measure$LETTERlaw.abiding[1:77])
ident.measure$LETTERlaw.abiding.lag3 <- c(NA,NA,NA,ident.measure$LETTERlaw.abiding[1:76])
summary(lm(LETTERlaw.abiding ~ RIFLElaw.abiding+RIFLElaw.abiding.lag1+RIFLElaw.abiding.lag2+RIFLElaw.abiding.lag3+LETTERlaw.abiding.lag1+LETTERlaw.abiding.lag2+LETTERlaw.abiding.lag3,data=ident.measure))

#Robustness test described in footnote 12
ident.measure <- read.csv("LaggedLPM.csv") 
ident.measure$RIFLEanti.gunners.lag1 <- c(NA,ident.measure$RIFLEanti.gunners[1:78])
ident.measure$RIFLEanti.gunners.lag2 <- c(NA,NA,ident.measure$RIFLEanti.gunners[1:77])
ident.measure$RIFLEanti.gunners.lag3 <- c(NA,NA,NA,ident.measure$RIFLEanti.gunners[1:76])
ident.measure$LETTERanti.gunners.lag1 <- c(NA,ident.measure$LETTERanti.gunners[1:78])
ident.measure$LETTERanti.gunners.lag2 <- c(NA,NA,ident.measure$LETTERanti.gunners[1:77])
ident.measure$LETTERanti.gunners.lag3 <- c(NA,NA,NA,ident.measure$LETTERanti.gunners[1:76])
summary(lm(RIFLEanti.gunners ~ LETTERanti.gunners+RIFLEanti.gunners.lag1+RIFLEanti.gunners.lag2+RIFLEanti.gunners.lag3+LETTERanti.gunners.lag1+LETTERanti.gunners.lag2+LETTERanti.gunners.lag3,data=ident.measure))

ident.measure$RIFLEaverage.citizens.lag1 <- c(NA,ident.measure$RIFLEaverage.citizens[1:78])
ident.measure$RIFLEaverage.citizens.lag2 <- c(NA,NA,ident.measure$RIFLEaverage.citizens[1:77])
ident.measure$RIFLEaverage.citizens.lag3 <- c(NA,NA,NA,ident.measure$RIFLEaverage.citizens[1:76])
ident.measure$LETTERaverage.citizens.lag1 <- c(NA,ident.measure$LETTERaverage.citizens[1:78])
ident.measure$LETTERaverage.citizens.lag2 <- c(NA,NA,ident.measure$LETTERaverage.citizens[1:77])
ident.measure$LETTERaverage.citizens.lag3 <- c(NA,NA,NA,ident.measure$LETTERaverage.citizens[1:76])
summary(lm(RIFLEaverage.citizens ~ LETTERaverage.citizens+RIFLEaverage.citizens.lag1+RIFLEaverage.citizens.lag2+RIFLEaverage.citizens.lag3+LETTERaverage.citizens.lag1+LETTERaverage.citizens.lag2+LETTERaverage.citizens.lag3,data=ident.measure))

ident.measure$RIFLEfreedom.loving.lag1 <- c(NA,ident.measure$RIFLEfreedom.loving[1:78])
ident.measure$RIFLEfreedom.loving.lag2 <- c(NA,NA,ident.measure$RIFLEfreedom.loving[1:77])
ident.measure$RIFLEfreedom.loving.lag3 <- c(NA,NA,NA,ident.measure$RIFLEfreedom.loving[1:76])
ident.measure$LETTERfreedom.loving.lag1 <- c(NA,ident.measure$LETTERfreedom.loving[1:78])
ident.measure$LETTERfreedom.loving.lag2 <- c(NA,NA,ident.measure$LETTERfreedom.loving[1:77])
ident.measure$LETTERfreedom.loving.lag3 <- c(NA,NA,NA,ident.measure$LETTERfreedom.loving[1:76])
summary(lm(RIFLEfreedom.loving ~ LETTERfreedom.loving+RIFLEfreedom.loving.lag1+RIFLEfreedom.loving.lag2+RIFLEfreedom.loving.lag3+LETTERfreedom.loving.lag1+LETTERfreedom.loving.lag2+LETTERfreedom.loving.lag3,data=ident.measure))

ident.measure$RIFLEordinary.citizens.lag1 <- c(NA,ident.measure$RIFLEordinary.citizens[1:78])
ident.measure$RIFLEordinary.citizens.lag2 <- c(NA,NA,ident.measure$RIFLEordinary.citizens[1:77])
ident.measure$RIFLEordinary.citizens.lag3 <- c(NA,NA,NA,ident.measure$RIFLEordinary.citizens[1:76])
ident.measure$LETTERordinary.citizens.lag1 <- c(NA,ident.measure$LETTERordinary.citizens[1:78])
ident.measure$LETTERordinary.citizens.lag2 <- c(NA,NA,ident.measure$LETTERordinary.citizens[1:77])
ident.measure$LETTERordinary.citizens.lag3 <- c(NA,NA,NA,ident.measure$LETTERordinary.citizens[1:76])
summary(lm(RIFLEordinary.citizens ~ LETTERordinary.citizens+RIFLEordinary.citizens.lag1+RIFLEordinary.citizens.lag2+RIFLEordinary.citizens.lag3+LETTERordinary.citizens.lag1+LETTERordinary.citizens.lag2+LETTERordinary.citizens.lag3,data=ident.measure))

ident.measure$RIFLElaw.abiding.lag1 <- c(NA,ident.measure$RIFLElaw.abiding[1:78])
ident.measure$RIFLElaw.abiding.lag2 <- c(NA,NA,ident.measure$RIFLElaw.abiding[1:77])
ident.measure$RIFLElaw.abiding.lag3 <- c(NA,NA,NA,ident.measure$RIFLElaw.abiding[1:76])
ident.measure$LETTERlaw.abiding.lag1 <- c(NA,ident.measure$LETTERlaw.abiding[1:78])
ident.measure$LETTERlaw.abiding.lag2 <- c(NA,NA,ident.measure$LETTERlaw.abiding[1:77])
ident.measure$LETTERlaw.abiding.lag3 <- c(NA,NA,NA,ident.measure$LETTERlaw.abiding[1:76])
summary(lm(RIFLElaw.abiding ~ LETTERlaw.abiding+RIFLElaw.abiding.lag1+RIFLElaw.abiding.lag2+RIFLElaw.abiding.lag3+LETTERlaw.abiding.lag1+LETTERlaw.abiding.lag2+LETTERlaw.abiding.lag3,data=ident.measure))


###COSINE SIMILARITY CONTRAST CALCULATIONS###
library(quanteda)
library(readtext)

###among subset of documents with identity frames
##across ~10 year buckets
stops<-c("gun*","law","state","use","can","one","weapon","year*","time","new","make","legis*","bill","law","laws","must","need","like","control","firearm*","regulation*","editor*","author","column","writer","letter","article","will","NRA",stopwords("english")) 
data <- readtext("CosineRepData.csv",text_field="Text")
corpus <- corpus(data) 
identcorp <- corpus_subset(corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun") 
identcorp <- corpus_subset(identcorp,IdentityFrameforLeg==1)
identcorp <- corpus_subset(identcorp,NotMixed==1)
identcorp <- corpus_subset(identcorp,NotNone==1)
bySourceDfmIdent <- dfm(identcorp, groups = "SourceStanceDecade", tolower=TRUE, stem=TRUE, 
                        remove = stops, remove_punct = TRUE) 
s1 <- textstat_simil(bySourceDfmIdent, method = "cosine", margin = "documents") 
as.matrix(s1) 
 
#calculating the sum total difference in responsiveness   
diff.vec <- 1:7
for (comparison.count in 1:7)
  diff.vec[comparison.count] <- as.matrix(s1)[comparison.count,comparison.count+9] - as.matrix(s1)[comparison.count+1,comparison.count+8]
diff.vec
sum(diff.vec)

#bootstrap function to calculate confidence intervals
library(boot)
data <- readtext("CosineRepData.csv",text_field="Text")
allID.responsedec.noanti.boot.fun<- function(data,indices){
  temp.data<-data[indices,]
  temp.corpus <- corpus(temp.data)
  temp.identcorp <- corpus_subset(temp.corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun")
  temp.identcorp <- corpus_subset(temp.identcorp,IdentityFrameforLeg==1)
  temp.identcorp <- corpus_subset(temp.identcorp,NotMixed==1)
  temp.identcorp <- corpus_subset(temp.identcorp,NotNone==1)
  temp.bySourceDfmIdent <- dfm(temp.identcorp, groups = "SourceStanceDecade", tolower=TRUE, stem=TRUE, 
                               remove = stops, remove_punct = TRUE)
  s1.temp <- textstat_simil(temp.bySourceDfmIdent, method = "cosine", margin = "documents")
  
  
  diff.vec.temp <- 1:7
  
  for (comparison.count in 1:7)
    diff.vec.temp[comparison.count] <- as.matrix(s1.temp)[comparison.count,comparison.count+9] - as.matrix(s1)[comparison.count+1,comparison.count+8]
  
  sum(diff.vec.temp)
  
}
result<-boot(data=data,statistic=allID.responsedec.noanti.boot.fun,R=1000)
result

#calculating confidence intervals based on result
#original bootstrap statistics = 0.2305098 sum difference with a 0.09455817 standard error
#calculating average difference and SE
0.2305098/7 #result = 0.03292997
0.09455817/7 #result = 0.01350831 
#calculating confidence interval for average
(0.09455817/7)*1.96 #result = 0.02647629
#results pulled into a separate csv file for figure creation (code below)

##across ~16 year buckets 
data <- readtext("CosineRepData.csv",text_field="Text")
corpus <- corpus(data)
identcorp <- corpus_subset(corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun")
identcorp <- corpus_subset(identcorp,IdentityFrameforLeg==1)
identcorp <- corpus_subset(identcorp,NotMixed==1)
identcorp <- corpus_subset(identcorp,NotNone==1)
bySourceDfmIdent <- dfm(identcorp, groups = "SourceStanceBucket", tolower=TRUE, stem=TRUE, 
                        remove = stops, remove_punct = TRUE)
s1 <- textstat_simil(bySourceDfmIdent, method = "cosine", margin = "documents")
as.matrix(s1)

#calculating the sum total difference in responsiveness  
diff.vec <- 1:4
for (comparison.count in 1:4)
  diff.vec[comparison.count] <- as.matrix(s1)[comparison.count,comparison.count+6] - as.matrix(s1)[comparison.count+1,comparison.count+5]
diff.vec
sum(diff.vec)

#bootstrap function to calculate confidence intervals
install.packages("boot")
library(boot)
data <- readtext("CosineRepData.csv",text_field="Text")
allID.responsebucket.noanti.boot.fun<- function(data,indices){
  temp.data<-data[indices,]
  temp.corpus <- corpus(temp.data)
  temp.identcorp <- corpus_subset(temp.corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun")
  temp.identcorp <- corpus_subset(temp.identcorp,IdentityFrameforLeg==1)
  temp.identcorp <- corpus_subset(temp.identcorp,NotMixed==1)
  temp.identcorp <- corpus_subset(temp.identcorp,NotNone==1)
  temp.bySourceDfmIdent <- dfm(temp.identcorp, groups = "SourceStanceBucket", tolower=TRUE, stem=TRUE, 
                               remove = stops, remove_punct = TRUE)
  s1.temp <- textstat_simil(temp.bySourceDfmIdent, method = "cosine", margin = "documents")
  
  
  diff.vec.temp <- 1:4
  
  for (comparison.count in 1:4)
    diff.vec.temp[comparison.count] <- as.matrix(s1.temp)[comparison.count,comparison.count+6] - as.matrix(s1)[comparison.count+1,comparison.count+5]
  
  sum(diff.vec.temp)
  
}
result<-boot(data=data,statistic=allID.responsebucket.noanti.boot.fun,R=1000)
result

#calculating confidence intervals based on result
#original bootstrap statistics = 0.2002996 sum difference with a 0.05359723 standard error
#calculating average difference and SE
0.2002996/4 #result = 0.0500749
0.05359723/4 #result = 0.01339931
#calculating confidence interval for average
(0.05359723/4)*1.96 #result = 0.02626264
#results pulled into a separate csv file for figure creation (code below)


##across ~13 year buckets 
data <- readtext("CosineRepData.csv",text_field="Text")
corpus <- corpus(data)
identcorp <- corpus_subset(corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun")
identcorp <- corpus_subset(identcorp,IdentityFrameforLeg==1)
identcorp <- corpus_subset(identcorp,NotMixed==1)
identcorp <- corpus_subset(identcorp,NotNone==1)
bySourceDfmIdent <- dfm(identcorp, groups = "SourceStance6buck", tolower=TRUE, stem=TRUE, 
                        remove = stops, remove_punct = TRUE)
s1 <- textstat_simil(bySourceDfmIdent, method = "cosine", margin = "documents")
as.matrix(s1)

#calculating the sum total difference in responsiveness  
diff.vec <- 1:5
for (comparison.count in 1:5)
  diff.vec[comparison.count] <- as.matrix(s1)[comparison.count,comparison.count+7] - as.matrix(s1)[comparison.count+1,comparison.count+6]
diff.vec
sum(diff.vec)

#bootstrap function to calculate confidence intervals
install.packages("boot")
library(boot)
data <- readtext("CosineRepData.csv",text_field="Text")
allID.response6buck.noanti.boot.fun<- function(data,indices){
  temp.data<-data[indices,]
  temp.corpus <- corpus(temp.data)
  temp.identcorp <- corpus_subset(temp.corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun")
  temp.identcorp <- corpus_subset(temp.identcorp,IdentityFrameforLeg==1)
  temp.identcorp <- corpus_subset(temp.identcorp,NotMixed==1)
  temp.identcorp <- corpus_subset(temp.identcorp,NotNone==1)
  temp.bySourceDfmIdent <- dfm(temp.identcorp, groups = "SourceStance6buck", tolower=TRUE, stem=TRUE, 
                               remove = stops, remove_punct = TRUE)
  s1.temp <- textstat_simil(temp.bySourceDfmIdent, method = "cosine", margin = "documents")
  
  
  diff.vec.temp <- 1:5
  
  for (comparison.count in 1:5)
    diff.vec.temp[comparison.count] <- as.matrix(s1.temp)[comparison.count,comparison.count+7] - as.matrix(s1)[comparison.count+1,comparison.count+6]
  
  sum(diff.vec.temp)
  
}
result<-boot(data=data,statistic=allID.response6buck.noanti.boot.fun,R=1000)
result

#calculating confidence intervals based on result
#original bootstrap statistics = 0.1787226 sum difference with a 0.06807615 standard error
#calculating average difference and SE
0.1787226/5 #result = 0.03574452
0.06807615/5 #result = 0.01361523
#calculating confidence interval for average
(0.06807615/5)*1.96 #result = 0.02668585
#results pulled into a separate csv file for figure creation (code below)


##across 5 theoretically-driven buckets
data <- readtext("CosineRepData.csv",text_field="Text")
corpus <- corpus(data)
identcorp <- corpus_subset(corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun")
identcorp <- corpus_subset(identcorp,IdentityFrameforLeg==1)
identcorp <- corpus_subset(identcorp,NotMixed==1)
identcorp <- corpus_subset(identcorp,NotNone==1)
bySourceDfmIdent <- dfm(identcorp, groups = "SourceStanceTheory", tolower=TRUE, stem=TRUE, 
                        remove = stops, remove_punct = TRUE)
s1 <- textstat_simil(bySourceDfmIdent, method = "cosine", margin = "documents")
as.matrix(s1)

#calculating the sum total difference in responsiveness  
diff.vec <- 1:4
for (comparison.count in 1:4)
  diff.vec[comparison.count] <- as.matrix(s1)[comparison.count,comparison.count+6] - as.matrix(s1)[comparison.count+1,comparison.count+5]
diff.vec
sum(diff.vec)

#bootstrap function to calculate confidence intervals
install.packages("boot")
library(boot)
data <- readtext("CosineRepData.csv",text_field="Text")
allID.responsetheory.noanti.boot.fun<- function(data,indices){
  temp.data<-data[indices,]
  temp.corpus <- corpus(temp.data)
  temp.identcorp <- corpus_subset(temp.corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun")
  temp.identcorp <- corpus_subset(temp.identcorp,IdentityFrameforLeg==1)
  temp.identcorp <- corpus_subset(temp.identcorp,NotMixed==1)
  temp.identcorp <- corpus_subset(temp.identcorp,NotNone==1)
  temp.bySourceDfmIdent <- dfm(temp.identcorp, groups = "SourceStanceTheory", tolower=TRUE, stem=TRUE, 
                               remove = stops, remove_punct = TRUE)
  s1.temp <- textstat_simil(temp.bySourceDfmIdent, method = "cosine", margin = "documents")
  
  
  diff.vec.temp <- 1:4
  
  for (comparison.count in 1:4)
    diff.vec.temp[comparison.count] <- as.matrix(s1.temp)[comparison.count,comparison.count+6] - as.matrix(s1)[comparison.count+1,comparison.count+5]
  
  sum(diff.vec.temp)
  
}
result<-boot(data=data,statistic=allID.responsetheory.noanti.boot.fun,R=1000)
result

#calculating confidence intervals based on result
#original bootstrap statistics = 0.2007826 sum difference with a 0.05751131 standard error
#calculating average difference and SE
0.2007826/4 #result = 0.05019565
0.05751131/4 #result = 0.01437783
#calculating confidence interval for average
(0.05751131/4)*1.96 #result = 0.02818054
#results pulled into a separate csv file for figure creation (code below)


###among all documents, not just those with identity frames
##across ~10 year buckets
data <- readtext("CosineRepData.csv",text_field="Text")
corpus <- corpus(data)
identcorp <- corpus_subset(corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun")
identcorp <- corpus_subset(identcorp,NotMixed==1)
identcorp <- corpus_subset(identcorp,NotNone==1)
bySourceDfmIdent <- dfm(identcorp, groups = "SourceStanceDecade", tolower=TRUE, stem=TRUE, 
                        remove = stops, remove_punct = TRUE)
s1 <- textstat_simil(bySourceDfmIdent, method = "cosine", margin = "documents")
as.matrix(s1)

#calculating the sum total difference in responsiveness  
diff.vec <- 1:7
for (comparison.count in 1:7)
  diff.vec[comparison.count] <- as.matrix(s1)[comparison.count,comparison.count+9] - as.matrix(s1)[comparison.count+1,comparison.count+8]
diff.vec
sum(diff.vec)

#bootstrap function to calculate confidence intervals
install.packages("boot")
library(boot)
data <- readtext("CosineRepData.csv",text_field="Text")
alldocs.responsedec.noanti.boot.fun<- function(data,indices){
  temp.data<-data[indices,]
  temp.corpus <- corpus(temp.data)
  temp.identcorp <- corpus_subset(temp.corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun")
  temp.identcorp <- corpus_subset(temp.identcorp,NotMixed==1)
  temp.identcorp <- corpus_subset(temp.identcorp,NotNone==1)
  temp.bySourceDfmIdent <- dfm(temp.identcorp, groups = "SourceStanceDecade", tolower=TRUE, stem=TRUE, 
                               remove = stops, remove_punct = TRUE)
  s1.temp <- textstat_simil(temp.bySourceDfmIdent, method = "cosine", margin = "documents")
  
  
  diff.vec.temp <- 1:7
  
  for (comparison.count in 1:7)
    diff.vec.temp[comparison.count] <- as.matrix(s1.temp)[comparison.count,comparison.count+9] - as.matrix(s1)[comparison.count+1,comparison.count+8]
  
  sum(diff.vec.temp)
  
}
result<-boot(data=data,statistic=alldocs.responsedec.noanti.boot.fun,R=1000)
result

#calculating confidence intervals based on result
#original bootstrap statistics = 0.2138511 sum difference with a 0.0811189 standard error
#calculating average difference and SE
0.2138511/7 #result = 0.03055016
0.0811189/7 #result = 0.01158841
#calculating confidence interval for average
(0.0811189/7)*1.96 #result = 0.02271329
#results pulled into a separate csv file for figure creation (code below)


##across ~16 year buckets
data <- readtext("CosineRepData.csv",text_field="Text")
corpus <- corpus(data)
identcorp <- corpus_subset(corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun")
identcorp <- corpus_subset(identcorp,NotMixed==1)
identcorp <- corpus_subset(identcorp,NotNone==1)
bySourceDfmIdent <- dfm(identcorp, groups = "SourceStanceBucket", tolower=TRUE, stem=TRUE, 
                        remove = stops, remove_punct = TRUE)
s1 <- textstat_simil(bySourceDfmIdent, method = "cosine", margin = "documents")
as.matrix(s1)

#calculating the sum total difference in responsiveness  
diff.vec <- 1:4
for (comparison.count in 1:4)
  diff.vec[comparison.count] <- as.matrix(s1)[comparison.count,comparison.count+6] - as.matrix(s1)[comparison.count+1,comparison.count+5]
diff.vec
sum(diff.vec)

#bootstrap function to calculate confidence intervals
install.packages("boot")
library(boot)
data <- readtext("CosineRepData.csv",text_field="Text")
alldocs.responsebucket.noanti.boot.fun<- function(data,indices){
  temp.data<-data[indices,]
  temp.corpus <- corpus(temp.data)
  temp.identcorp <- corpus_subset(temp.corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun")
  temp.identcorp <- corpus_subset(temp.identcorp,NotMixed==1)
  temp.identcorp <- corpus_subset(temp.identcorp,NotNone==1)
  temp.bySourceDfmIdent <- dfm(temp.identcorp, groups = "SourceStanceBucket", tolower=TRUE, stem=TRUE, 
                               remove = stops, remove_punct = TRUE)
  s1.temp <- textstat_simil(temp.bySourceDfmIdent, method = "cosine", margin = "documents")
  
  
  diff.vec.temp <- 1:4
  
  for (comparison.count in 1:4)
    diff.vec.temp[comparison.count] <- as.matrix(s1.temp)[comparison.count,comparison.count+6] - as.matrix(s1)[comparison.count+1,comparison.count+5]
  
  sum(diff.vec.temp)
  
}
result<-boot(data=data,statistic=alldocs.responsebucket.noanti.boot.fun,R=1000)
result

#calculating confidence intervals based on result
#original bootstrap statistics = 0.1923259 sum difference with a 0.04245776 standard error
#calculating average difference and SE
0.1923259/4 #result = 0.04808147
0.04245776/4 #result = 0.01061444
#calculating confidence interval for average
(0.04245776/4)*1.96 #result = 0.0208043
#results pulled into a separate csv file for figure creation (code below)


##across ~13 year buckets
data <- readtext("CosineRepData.csv",text_field="Text")
corpus <- corpus(data)
identcorp <- corpus_subset(corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun")
identcorp <- corpus_subset(identcorp,NotMixed==1)
identcorp <- corpus_subset(identcorp,NotNone==1)
bySourceDfmIdent <- dfm(identcorp, groups = "SourceStance6buck", tolower=TRUE, stem=TRUE, 
                        remove = stops, remove_punct = TRUE)
s1 <- textstat_simil(bySourceDfmIdent, method = "cosine", margin = "documents")
as.matrix(s1)

#calculating the sum total difference in responsiveness  
diff.vec <- 1:5
for (comparison.count in 1:5)
  diff.vec[comparison.count] <- as.matrix(s1)[comparison.count,comparison.count+7] - as.matrix(s1)[comparison.count+1,comparison.count+6]
diff.vec
sum(diff.vec)

#bootstrap function to calculate confidence intervals
install.packages("boot")
library(boot)
data <- readtext("CosineRepData.csv",text_field="Text")
alldocs.response6buck.noanti.boot.fun<- function(data,indices){
  temp.data<-data[indices,]
  temp.corpus <- corpus(temp.data)
  temp.identcorp <- corpus_subset(temp.corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun")
  temp.identcorp <- corpus_subset(temp.identcorp,NotMixed==1)
  temp.identcorp <- corpus_subset(temp.identcorp,NotNone==1)
  temp.bySourceDfmIdent <- dfm(temp.identcorp, groups = "SourceStance6buck", tolower=TRUE, stem=TRUE, 
                               remove = stops, remove_punct = TRUE)
  s1.temp <- textstat_simil(temp.bySourceDfmIdent, method = "cosine", margin = "documents")
  
  
  diff.vec.temp <- 1:5
  
  for (comparison.count in 1:5)
    diff.vec.temp[comparison.count] <- as.matrix(s1.temp)[comparison.count,comparison.count+7] - as.matrix(s1)[comparison.count+1,comparison.count+6]
  
  sum(diff.vec.temp)
  
}
result<-boot(data=data,statistic=alldocs.response6buck.noanti.boot.fun,R=1000)
result

#calculating confidence intervals based on result
#original bootstrap statistics = 0.1573354 sum difference with a 0.05631769 standard error
#calculating average difference and SE
0.1573354/5 #result = 0.03146708
0.05631769/5 #result = 0.01126354
#calculating confidence interval for average
(0.05631769/5)*1.96 #result = 0.02207653
#results pulled into a separate csv file for figure creation (code below)


##across 5 theoretically-driven buckets
data <- readtext("CosineRepData.csv",text_field="Text")
corpus <- corpus(data)
identcorp <- corpus_subset(corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun")
identcorp <- corpus_subset(identcorp,NotMixed==1)
identcorp <- corpus_subset(identcorp,NotNone==1)
bySourceDfmIdent <- dfm(identcorp, groups = "SourceStanceTheory", tolower=TRUE, stem=TRUE, 
                        remove = stops, remove_punct = TRUE)
s1 <- textstat_simil(bySourceDfmIdent, method = "cosine", margin = "documents")
as.matrix(s1)

#calculating the sum total difference in responsiveness  
diff.vec <- 1:4
for (comparison.count in 1:4)
  diff.vec[comparison.count] <- as.matrix(s1)[comparison.count,comparison.count+6] - as.matrix(s1)[comparison.count+1,comparison.count+5]
diff.vec
sum(diff.vec)

#bootstrap function to calculate confidence intervals
install.packages("boot")
library(boot)
data <- readtext("CosineRepData.csv",text_field="Text")
alldocs.responsetheory.noanti.boot.fun<- function(data,indices){
  temp.data<-data[indices,]
  temp.corpus <- corpus(temp.data)
  temp.identcorp <- corpus_subset(temp.corpus,SourceStance=="ARPro-Gun"|SourceStance=="PaperPro-Gun")
  temp.identcorp <- corpus_subset(temp.identcorp,NotMixed==1)
  temp.identcorp <- corpus_subset(temp.identcorp,NotNone==1)
  temp.bySourceDfmIdent <- dfm(temp.identcorp, groups = "SourceStanceTheory", tolower=TRUE, stem=TRUE, 
                               remove = stops, remove_punct = TRUE)
  s1.temp <- textstat_simil(temp.bySourceDfmIdent, method = "cosine", margin = "documents")
  
  
  diff.vec.temp <- 1:4
  
  for (comparison.count in 1:4)
    diff.vec.temp[comparison.count] <- as.matrix(s1.temp)[comparison.count,comparison.count+6] - as.matrix(s1)[comparison.count+1,comparison.count+5]
  
  sum(diff.vec.temp)
  
}
result<-boot(data=data,statistic=alldocs.responsetheory.noanti.boot.fun,R=1000)
result

#calculating confidence intervals based on result
#original bootstrap statistics = 0.1885429 sum difference with a 0.04610287 standard error
#calculating average difference and SE
0.1885429/4 #result = 0.04713573
0.04610287/4 #result = 0.01152572
#calculating confidence interval for average
(0.04610287/4)*1.96 #result = 0.02259041
#results pulled into a separate csv file for figure creation (code below)


#figure 1
library(ggplot2)
library(cowplot)
cosine.data <- read.csv("CosineFigDataRep.csv")
p1 <- ggplot(cosine.data,aes(x=Bucket,y=IDFrameAverage)) +
  geom_point(position=position_dodge(), stat="identity",colour="black",size=3) +
  geom_errorbar(aes(ymin=IDFrameAverage-CI.IDFrameAve, ymax=IDFrameAverage+CI.IDFrameAve),colour="black",width=.2,position=position_dodge(.9))+
  ylim(-.01,.09) +
  labs(x=expression(Grouping~Type),y=expression(Cosine~Similarity~Contrast))+
  ggtitle("Among Documents with Identity Frames")+
  theme_classic()+
  geom_hline(yintercept = 0,linetype="dashed")+
  theme(plot.title = element_text(hjust = 0.5))+
  theme(axis.text.x=element_text(size=15))+
  theme(axis.text.y=element_text(size=14))+
  theme(axis.title.x=element_text(size=14))+
  theme(axis.title.y=element_text(size=14))+
  theme(plot.title=element_text(size=16,face="bold"))
p1

p2 <- ggplot(cosine.data,aes(x=Bucket,y=AllDocAverage)) +
  geom_point(position=position_dodge(), stat="identity",colour="black",size=3) +
  geom_errorbar(aes(ymin=AllDocAverage-CI.AllDocAve, ymax=AllDocAverage+CI.AllDocAve),colour="black",width=.2,position=position_dodge(.9))+
  ylim(-.01,.09) +
  xlab("Grouping Type") +
  ylab("")+
  ggtitle("Among All Documents")+
  geom_hline(yintercept = 0,linetype="dashed")+
  theme_classic()+
  theme(plot.title = element_text(hjust = 0.5))+
  theme(axis.text.x=element_text(size=15))+
  theme(axis.text.y=element_text(size=14))+
  theme(axis.title.x=element_text(size=14))+
  theme(axis.title.y=element_text(size=14))+
  theme(plot.title=element_text(size=16,face="bold"))
p2

p <- plot_grid(p1,p2,ncol=2)
title <- ggdraw() + draw_label("FIGURE 1: Average Cosine Similarity Responsiveness", fontface='bold',size=16,x=.24)
pfinal <- plot_grid(title,p,ncol=1, rel_heights=c(0.1, 1))             
pfinal

###LOGIT MODEL IN TABLE 5###
t5 <- read.csv("Table5RepData.csv")
table5log <- glm(CallToAction ~ Threat + IDLanguage + DiscussesPolicyDirectly, data=t5,family=binomial(link=logit))
summary(table5log)

